# clean environment
rm(list=ls())

# load packages
library(MASS)
library(mice)
library(miceadds)

# load data set with parliamentary inquiries
dat <- read.csv("inquiries.csv")

# load crime data
crime_dat <- openxlsx::read.xlsx("vsberichte_metadata.xlsx", sheet=1)

# recode extremism variable
dat$extremism_reg <- NA
dat$extremism_reg[dat$extremism_v=="left"] <- 0
dat$extremism_reg[dat$extremism_v=="both"] <- 1
dat$extremism_reg[dat$extremism_v=="right"] <- 2

# create english party variable
dat$party_eng <- NA
dat$party_eng[dat$party=="AFD"] <- "AfD"
dat$party_eng[dat$party=="CDU"] <- "CDU/CSU"
dat$party_eng[dat$party=="FDP"] <- "FDP"
dat$party_eng[dat$party=="GRUENE"] <- "Greens"
dat$party_eng[dat$party=="LINKE"] <- "Left"
dat$party_eng[dat$party=="SPD"] <- "SPD"
dat$party_eng <- factor(dat$party_eng, levels=c("CDU/CSU", "AfD", "FDP", "Greens", "Left", "SPD"))

# create decade variable
dat$decade <- NA
dat$decade[dat$year<1960]<-1950
dat$decade[dat$year>=1960 & dat$year<1970]<-1960
dat$decade[dat$year>=1970 & dat$year<1980]<-1970
dat$decade[dat$year>=1980 & dat$year<1990]<-1980
dat$decade[dat$year>=1990 & dat$year<2000]<-1990
dat$decade[dat$year>=2000 & dat$year<2010]<-2000
dat$decade[dat$year>=2010 & dat$year<2020]<-2010

# create jurisdiction variable for merging
dat$jurisdiction_long <- NA
dat$jurisdiction_long[dat$jurisdiction=="bb"] <- "brandenburg"
dat$jurisdiction_long[dat$jurisdiction=="be"] <- "berlin"
dat$jurisdiction_long[dat$jurisdiction=="bt"] <- "bund"
dat$jurisdiction_long[dat$jurisdiction=="bw"] <- "baden-württemberg"
dat$jurisdiction_long[dat$jurisdiction=="by"] <- "bayern"
dat$jurisdiction_long[dat$jurisdiction=="hb"] <- "bremen"
dat$jurisdiction_long[dat$jurisdiction=="he"] <- "hessen"
dat$jurisdiction_long[dat$jurisdiction=="hh"] <- "hamburg"
dat$jurisdiction_long[dat$jurisdiction=="mv"] <- "mecklenburg-vorpommern"
dat$jurisdiction_long[dat$jurisdiction=="ni"] <- "niedersachsen"
dat$jurisdiction_long[dat$jurisdiction=="nw"] <- "nordrhein-westfalen"
dat$jurisdiction_long[dat$jurisdiction=="rp"] <- "rheinland-pfalz"
dat$jurisdiction_long[dat$jurisdiction=="sh"] <- "schleswig-holstein"
dat$jurisdiction_long[dat$jurisdiction=="sl"] <- "saarland"
dat$jurisdiction_long[dat$jurisdiction=="sn"] <- "sachsen"
dat$jurisdiction_long[dat$jurisdiction=="st"] <- "sachsen-anhalt"
dat$jurisdiction_long[dat$jurisdiction=="th"] <- "thüringen"

# lagged crime numbers
crime_dat_lag <- crime_dat[,c("jurisdiction", "year", "extreme_crime_right", "extreme_crime_left")]
crime_dat_lag$year <- crime_dat_lag$year+1
names(crime_dat_lag)[3:4] <- c("extreme_crime_right_lag", "extreme_crime_left_lag")
crime_dat <- merge(crime_dat, crime_dat_lag, by=c("jurisdiction", "year"), all.x = T)

# merge with crime data
dat <- merge(dat, crime_dat, by.x=c("jurisdiction_long", "year"), by.y=c("jurisdiction", "year"),
             all.x = T)

# create ratio of crime
dat$rwe_lwe_crime_ratio <- dat$extreme_crime_right / dat$extreme_crime_left
dat$rwe_lwe_crime_ratio_lag <- dat$extreme_crime_right_lag / dat$extreme_crime_left_lag


# imputation model
set.seed(1904)
dat$group <- paste0(dat$jurisdiction, "-", dat$year)
mice_dat <- dat[!is.na(dat$extremism_reg),c("jurisdiction", "year", "party_eng", "extremism_reg", "rwe_lwe_crime_ratio", "group")]

mice_dat$group <- as.integer(as.factor(mice_dat$group))

predM <- mice::make.predictorMatrix(data=mice_dat)
impM <- mice::make.method(data=mice_dat)

predM1 <- predM
predM1[,"group"] <- c(-2, -2, -2, -2, -2, 0)
impM1 <- impM
impM1[5] <- "2lonly.function"
# define imputation functions
imputationFunction <- list("rwe_lwe_crime_ratio"="pmm")
# define cluster variable
cluster_var <- list("rwe_lwe_crime_ratio"="group")
# impute
imp <- mice::mice(mice_dat, m=5, predictorMatrix=predM1, method=impM1, maxit=5,
                  imputationFunction=imputationFunction, cluster_var=cluster_var)


# regressions
mod_1 <- lm(extremism_reg~factor(party_eng), data=dat)
mod_2 <- lm(extremism_reg~factor(party_eng) + factor(jurisdiction), data=dat)
mod_3 <- lm(extremism_reg~factor(party_eng) + factor(jurisdiction) + factor(decade), data=dat)
mod_4 <- lm(extremism_reg~factor(party_eng) + factor(jurisdiction) + factor(year), data=dat)
mod_5 <- pool(with(data=imp, exp=lm(extremism_reg~factor(party_eng) + rwe_lwe_crime_ratio + factor(jurisdiction) + factor(year))))
